# This script reproduces Table 10 #

rm(list=ls())
options(digits=4)
pkg <- c("PanelMatch", "ggplot2", "plm", 
         "matrixStats",
         "foreign", 
         "stringi", "stringdist",
         "DataCombine","lmtest", "multiwayvcov",
         "data.table",
         "dplyr", 
         "stargazer",
         "clubSandwich", "mediation",
         "doBy")
lapply(pkg, require, character.only = TRUE)

# load datasets
proprietary <- read.csv("proprietary.csv")
load("non_p.RData")
source("functions.R")
# merge 
d_sub1 <- merge(d_sub1, proprietary, by = c("county_id", "time"), all.x = T)

d1 <- d_sub1

corruption <- readstata13::read.dta13("prefecture_panel.dta") # loading the dta from Chen & Kung
colnames(corruption)[colnames(corruption) == "prefid"] <- "city_id"
corruption <- corruption[which(corruption$ps == 1),]

city_pd <- d_sub1 %>% group_by(city_id, year) %>% 
  dplyr::summarize( bpd_1500_sum_y = sum(bpd_1500_sum, na.rm = T),
                    bpd_1500_mean_y = mean(bpd_1500_mean, na.rm = T),
                    
                    fpd_1500_sum_y = sum(fpd_1500_sum, na.rm = T),
                    fpd_1500_mean_y = mean(fpd_1500_mean, na.rm = T),
                    
                    bpd_500_sum_y = sum(bpd_500_sum, na.rm = T),
                    bpd_500_mean_y = mean(bpd_500_mean, na.rm = T),
                    
                    fpd_500_sum_y = sum(fpd_500_sum, na.rm = T),
                    fpd_500_mean_y = mean(fpd_500_mean, na.rm = T),
                    
                    bpd_1000_sum_y = sum(bpd_1000_sum, na.rm = T),
                    bpd_1000_mean_y = mean(bpd_1000_mean, na.rm = T),
                    
                    fpd_1000_sum_y = sum(fpd_1000_sum, na.rm = T),
                    fpd_1000_mean_y = mean(fpd_1000_mean, na.rm = T),
                    ##
                    bpd_pa_1500_sum_y = sum(bpd_pa_1500_sum, na.rm = T),
                    bpd_pa_1500_mean_y = mean(bpd_pa_1500_mean, na.rm = T),
                    
                    fpd_pa_1500_sum_y = sum(fpd_pa_1500_sum, na.rm = T),
                    fpd_pa_1500_mean_y = mean(fpd_pa_1500_mean, na.rm = T),
                    
                    bpd_pa_500_sum_y = sum(bpd_pa_500_sum, na.rm = T),
                    bpd_pa_500_mean_y = mean(bpd_pa_500_mean, na.rm = T),
                    
                    fpd_pa_500_sum_y = sum(fpd_pa_500_sum, na.rm = T),
                    fpd_pa_500_mean_y = mean(fpd_pa_500_mean, na.rm = T),
                    
                    bpd_pa_1000_sum_y = sum(bpd_pa_1000_sum, na.rm = T),
                    bpd_pa_1000_mean_y = mean(bpd_pa_1000_mean, na.rm = T),
                    
                    fpd_pa_1000_sum_y = sum(fpd_pa_1000_sum, na.rm = T),
                    fpd_pa_1000_mean_y = mean(fpd_pa_1000_mean, na.rm = T),
                    
                    pdd_1000_mean_y = mean(pdd_1000_mean, na.rm = T),
                    pdd_1000_sum_y = sum(pdd_1000_sum, na.rm = T),
                    
                    pdd_500_mean_y = mean(pdd_500_mean, na.rm = T),
                    pdd_500_sum_y = sum(pdd_500_sum, na.rm = T),
                    
                    pdd_1500_mean_y = mean(pdd_1500_mean, na.rm = T),
                    pdd_1500_sum_y = sum(pdd_1500_sum, na.rm = T),
                    ##
                    pdd_pa_1000_mean_y = mean(pdd_pa_1000_mean, na.rm = T),
                    pdd_pa_1000_sum_y = sum(pdd_pa_1000_sum, na.rm = T),
                    
                    pdd_pa_500_mean_y = mean(pdd_pa_500_mean, na.rm = T),
                    pdd_pa_500_sum_y = sum(pdd_pa_500_sum, na.rm = T),
                    
                    pdd_pa_1500_mean_y = mean(pdd_pa_1500_mean, na.rm = T),
                    pdd_pa_1500_sum_y = sum(pdd_pa_1500_sum, na.rm = T),
                    
                    abs_resid_log_begi_price_mean_y = mean(abs_resid_log_begi_price_mean, na.rm = T),
                    abs_resid_log_final_price_mean_y = mean(abs_resid_log_final_price_mean, na.rm = T),
                    abs_resid_log_begi_price_pa_mean_y = mean(abs_resid_log_begi_price_pa_mean, na.rm = T),
                    abs_resid_log_final_price_pa_mean_y = mean(abs_resid_log_final_price_pa_mean, na.rm = T),
                    abs_resid_log_price_difference_mean_y = mean(abs_resid_log_price_difference_mean, na.rm = T),
                    abs_resid_log_price_difference_pa_mean_y = mean(abs_resid_log_price_difference_pa_mean, na.rm = T)
  )

city_pd <- merge(city_pd, corruption, by = c("city_id", "year"))

city_pd$pdd_1500_sum_y[which(city_pd$pdd_1500_sum_y == "NaN")] <- NA
city_pd$pdd_1500_mean_y[which(city_pd$pdd_1500_mean_y == "NaN")] <- NA
city_pd$pdd_1000_mean_y[which(city_pd$pdd_1000_mean_y == "NaN")] <- NA
city_pd$pdd_1000_sum_y[which(city_pd$pdd_1000_sum_y == "NaN")] <- NA
city_pd$pdd_1000_sum_y[which(city_pd$pdd_1000_sum_y == "NaN")] <- NA
city_pd$pdd_500_sum_y[which(city_pd$pdd_500_sum_y == "NaN")] <- NA
city_pd$pdd_500_mean_y[which(city_pd$pdd_500_mean_y == "NaN")] <- NA

city_pd$pdd_pa_1500_sum_y[which(city_pd$pdd_pa_1500_sum_y == "NaN")] <- NA
city_pd$pdd_pa_1500_mean_y[which(city_pd$pdd_pa_1500_mean_y == "NaN")] <- NA
city_pd$pdd_pa_1000_mean_y[which(city_pd$pdd_pa_1000_mean_y == "NaN")] <- NA
city_pd$pdd_pa_1000_sum_y[which(city_pd$pdd_pa_1000_sum_y == "NaN")] <- NA
city_pd$pdd_pa_1000_sum_y[which(city_pd$pdd_pa_1000_sum_y == "NaN")] <- NA
city_pd$pdd_pa_500_sum_y[which(city_pd$pdd_pa_500_sum_y == "NaN")] <- NA
city_pd$pdd_pa_500_mean_y[which(city_pd$pdd_pa_500_mean_y == "NaN")] <- NA

city_pd$bpd_1500_sum_y[which(city_pd$bpd_1500_sum_y == "NaN")] <- NA
city_pd$bpd_1500_mean_y[which(city_pd$bpd_1500_mean_y == "NaN")] <- NA
city_pd$bpd_1000_mean_y[which(city_pd$bpd_1000_mean_y == "NaN")] <- NA
city_pd$bpd_1000_sum_y[which(city_pd$bpd_1000_sum_y == "NaN")] <- NA
city_pd$bpd_1000_sum_y[which(city_pd$bpd_1000_sum_y == "NaN")] <- NA
city_pd$bpd_500_sum_y[which(city_pd$bpd_500_sum_y == "NaN")] <- NA
city_pd$bpd_500_mean_y[which(city_pd$bpd_500_mean_y == "NaN")] <- NA

city_pd$bpd_pa_1500_sum_y[which(city_pd$bpd_pa_1500_sum_y == "NaN")] <- NA
city_pd$bpd_pa_1500_mean_y[which(city_pd$bpd_pa_1500_mean_y == "NaN")] <- NA
city_pd$bpd_pa_1000_mean_y[which(city_pd$bpd_pa_1000_mean_y == "NaN")] <- NA
city_pd$bpd_pa_1000_sum_y[which(city_pd$bpd_pa_1000_sum_y == "NaN")] <- NA
city_pd$bpd_pa_1000_sum_y[which(city_pd$bpd_pa_1000_sum_y == "NaN")] <- NA
city_pd$bpd_pa_500_sum_y[which(city_pd$bpd_pa_500_sum_y == "NaN")] <- NA
city_pd$bpd_pa_500_mean_y[which(city_pd$bpd_pa_500_mean_y == "NaN")] <- NA

city_pd$fpd_1500_sum_y[which(city_pd$fpd_1500_sum_y == "NaN")] <- NA
city_pd$fpd_1500_mean_y[which(city_pd$fpd_1500_mean_y == "NaN")] <- NA
city_pd$fpd_1000_mean_y[which(city_pd$fpd_1000_mean_y == "NaN")] <- NA
city_pd$fpd_1000_sum_y[which(city_pd$fpd_1000_sum_y == "NaN")] <- NA
city_pd$fpd_1000_sum_y[which(city_pd$fpd_1000_sum_y == "NaN")] <- NA
city_pd$fpd_500_sum_y[which(city_pd$fpd_500_sum_y == "NaN")] <- NA
city_pd$fpd_500_mean_y[which(city_pd$fpd_500_mean_y == "NaN")] <- NA

city_pd$fpd_pa_1500_sum_y[which(city_pd$fpd_pa_1500_sum_y == "NaN")] <- NA
city_pd$fpd_pa_1500_mean_y[which(city_pd$fpd_pa_1500_mean_y == "NaN")] <- NA
city_pd$fpd_pa_1000_mean_y[which(city_pd$fpd_pa_1000_mean_y == "NaN")] <- NA
city_pd$fpd_pa_1000_sum_y[which(city_pd$fpd_pa_1000_sum_y == "NaN")] <- NA
city_pd$fpd_pa_1000_sum_y[which(city_pd$fpd_pa_1000_sum_y == "NaN")] <- NA
city_pd$fpd_pa_500_sum_y[which(city_pd$fpd_pa_500_sum_y == "NaN")] <- NA
city_pd$fpd_pa_500_mean_y[which(city_pd$fpd_pa_500_mean_y == "NaN")] <- NA

city_pd$abs_resid_log_begi_price_mean_y[which(city_pd$abs_resid_log_begi_price_mean_y == "NaN")] <- NA
city_pd$abs_resid_log_final_price_mean_y[which(city_pd$abs_resid_log_final_price_mean_y == "NaN")] <- NA
city_pd$abs_resid_log_begi_price_pa_mean_y[which(city_pd$abs_resid_log_begi_price_pa_mean_y == "NaN")] <- NA
city_pd$abs_resid_log_final_price_pa_mean_y[which(city_pd$abs_resid_log_final_price_pa_mean_y == "NaN")] <- NA
city_pd$abs_resid_log_price_difference_mean_y[which(city_pd$abs_resid_log_price_difference_mean_y == "NaN")] <- NA
city_pd$abs_resid_log_price_difference_pa_mean_y[which(city_pd$abs_resid_log_price_difference_pa_mean_y == "NaN")] <- NA

city_pd <- taking_log("discount", city_pd)
city_pd$discount <- standardize(city_pd$discount)
city_pd$log_discount <- standardize(city_pd$log_discount)

var_names <- colnames(city_pd)[3:44]
for (i in 1:length(var_names)) {
  city_pd[, var_names[i]] <- standardize(city_pd[, var_names[i]])
}


var_names <- c("fpd_1500_mean_y", "fpd_1000_mean_y", "fpd_500_mean_y",
               "abs_resid_log_final_price_mean_y")
# divide them by a huge amount so that the regression tables 
# won't show too many zeros for the coefficients of these 
# variables
city_pd$fixedassetinv <- city_pd$fixedassetinv/100000000
city_pd$realestateinv <- city_pd$realestateinv/100000000
models <- list()
for (i in 1:length(var_names)) {
  formula1 <- as.formula(paste0(var_names[i], "~", "discount", 
                                "+lngdppc + gdpgrowth + fixedassetinv + lnpop + 
                                realestateinv"))
  model_area_con <- plm(formula = formula1,
                        index = c("city_id", "year"),
                        effect = "twoways", model = "within",
                        data = city_pd)
  
  vcov_area_con <- clubSandwich::vcovCR(model_area_con, type="CR1S", cluster = city_pd$city_id)
  se <- coeftest(model_area_con, vcov_area_con)[,2]
  models[[i]] <- list("model" = model_area_con, 
                      "se" = se)
}

sink("output/Table10.tex")
stargazer(models[[1]]$model, models[[2]]$model, models[[3]]$model,
          models[[4]]$model,
          font.size = "large", digits = 4, 
          type = "latex",
          dep.var.labels = c("Residualized Price Distance,
                             1500m radius",
                             "Residualized Price Distance,
                             1000m radius",
                             "Residualized Price Distance,
                             500m radius",
                             "Residuals from Spatial LASSO"),
          column.labels = c("1500m radius residuals", 
                            "1000m radius residuals",
                            "500m radius residuals",
                            "Residuals from Spatial LASSO"),
          dep.var.labels.include = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n", "rsq"),
          add.lines = list(c("Prefecture FE?", "Yes", "Yes", "Yes", "Yes"),
                           c("Year FE?", "Yes", "Yes", "Yes", "Yes")),
          covariate.labels = c("Princeling Discount", 
                               "Logged GDP per capita",
                               "GDP growth",
                               "Logged fixed asset investment", 
                               "Logged Population",
                               "Loged real estate investment"),
          star.cutoffs = c(.10, .05, .01), 
          se = list(models[[1]]$se, models[[2]]$se, models[[3]]$se, 
                    models[[4]]$se),
          
          header = F, notes = "robust standard errors clustered by prefecture in parentheses", 
          float = F)
sink()

